###############################################################################
# This file provides code for  
#
# Moore, Ryan T. and Nirmala Ravishankar. ``Who Loses in Direct Democracy?''.  
# Social Science Research, 41(3):646-656, May 2012.
#
# 1978 - 2000 Analyses
###############################################################################

setwd("../../whoLosesDataverse/")
library(MASS)

load("data/diffdis.RData")
print(dim(diffdis))
##[1] 195019     35
print(length(unique(diffdis$prop)))
##[1] 51
print(195019/51)
print(c(min(diffdis$year), max(diffdis$year)))

## TABLE 1, column 1:
print(round(apply(diffdis,2,mean, na.rm=T),3))

## Define NA counting function
nasum <- function(x){   
  return(sum(is.na(x)))
}

## Missing cells:
##  '27' is individ data, plus margin + winner
print(round(sum(apply(diffdis, 2, nasum))/(nrow(diffdis)*27),3))
##[1] 0.093
## OR, only vars (and omitted categories) used in regressions:
diffdis.tmp <- diffdis[,c("winner", "racewh", "racebl", "racehi",
                          "raceas", "inchi", "incmed", "inclow",
                          "edno", "edhs", "edcol", "edba", "agec1",
                          "agec2", "agec3", "agec4", "sex", "margin",
                          "regla", "regbay", "regsc", "regnc",
                          "regcv", "libcon", "pdem", "poth", "prep")]
round(sum(apply(diffdis.tmp, 1, nasum))/(nrow(diffdis.tmp)*27),3)
##[1] 0.093  identical, since no missingness in other agg data

## Define function to calculate missing data by units:
muns <- function(dat){
  nnn <- nmis <- nwin <- nlos <- nwm <- nlm <- array(NA,4)
  ccc <- 1
  for(i in c("wh", "bl","hi", "as")){
    v <- paste("race",i,sep="")
    tmp <- dat[dat[,v] %in% c(1),]
    nnn[ccc] <- nrow(tmp)
    nmis[ccc] <- sum(apply(tmp,1,nasum)>0)
    wd <- tmp[tmp$winner==1,]
    ld <- tmp[tmp$winner==0,]
    nwin[ccc] <- nrow(wd)
    nlos[ccc] <- nrow(ld)
    nwm[ccc] <- sum(apply(wd,1,nasum)>0)
    nlm[ccc] <- sum(apply(ld,1,nasum)>0)
    ccc <- ccc+1
  }
  return(list(nnn=nnn, nmis=nmis, nwin=nwin, nlos=nlos, nwm=nwm, nlm=nlm))
}
m <- muns(diffdis.tmp)

## Race missingness Overall
print(round(sum(m$nmis[2:4])/sum(m$nnn[2:4]), 3))  ##[1] 0.45
print(round(m$nmis[1]/m$nnn[1], 3))  ##[1] 0.465
print(prop.test(c(m$nmis[1], sum(m$nmis[2:4])), c(m$nnn[1], sum(m$nnn[2:4]))))
##X-squared = 23.9799, df = 1, p-value = 9.734e-07

## Win/Lose missingness by race
for(i in 1:4){
  print(prop.test(c(m$nwm[i], m$nlm[i]), c(m$nwin[i], m$nlos[i])))
}

## Win/Lose missingness Overall
wd <- diffdis.tmp[diffdis.tmp$winner==1,]
ld <- diffdis.tmp[diffdis.tmp$winner==0,]
rm(diffdis.tmp)
round(mean(apply(wd,1,nasum)>0),3)
##[1] 0.483
round(mean(apply(ld,1,nasum)>0),3)
##[1] 0.511
print(prop.test(c(sum(apply(wd,1,nasum)>0), sum(apply(ld,1,nasum)>0)),
          n=c(nrow(wd), nrow(ld))))
##X-squared = 148.8304, df = 1, p-value < 2.2e-16
rm(wd,ld)

## TABLE 6, Column 1
rep.table1 <- glm(winner ~ racebl + racehi + raceas + inchi + incmed +
edhs + edcol + edba + agec1 + agec4 + sex + margin + regla + regbay +
regsc + libcon+ pdem + poth, data = diffdis, family = binomial(logit))

print(summary(rep.table1))
## Same number of observations, 118477.

## Corrected HGL TABLE 1: include agec2, regnc

t1.corr <- glm(winner ~ racebl + racehi + raceas + inchi + incmed + edhs +
edcol + edba + agec1 + agec2 + agec4 + sex + margin + regla + regbay +
                  regsc + regnc + libcon+ pdem + poth, data = diffdis,
                  family = binomial(logit)) 

library(MASS)
nsims <- 5000
## Corrected:
set.seed(28493)
beta.sim <- mvrnorm(nsims, t1.corr$coefficients, summary(t1.corr)$cov.unscaled)

## Hypothetical voter profiles.
## For corrected, include agec2, agec2=1, and regnc
white <- c(1,0,0,0,0,1,0,1,0,0,1,0,0,12.1,0,0,0,0,2,0,1)
black <- c(1,1,0,0,0,1,0,1,0,0,1,0,0,12.1,0,0,0,0,2,0,1)
latino <-c(1,0,1,0,0,1,0,1,0,0,1,0,0,12.1,0,0,0,0,2,0,1)
asian <- c(1,0,0,1,0,1,0,1,0,0,1,0,0,12.1,0,0,0,0,2,0,1)
## Concatenating
hyp.covars <- matrix(rbind(white, black, latino, asian), nrow=4)
## corrected renaming:
colnames(hyp.covars) <- names(coef(t1.corr))
rownames(hyp.covars) <- c("white", "black", "latino", "asian")
## Clean up
rm(white, black, latino, asian)

## Table 5, Column 1
## Create fitted values
fits <- 1/(1+exp(-hyp.covars%*%t(beta.sim)))
## Means
print(round(apply(fits, 1, mean),3))

## 95% CI's 
quant.func <- function(x){
  quantile(x, c(0.025, 0.975))
}
## CI's
print(round(apply(fits, 1, quant.func),3))

## Table 5, Column 2
##First Differences
first.diffs <- matrix(NA, 4, nsims)
rownames(first.diffs) <- c("white", "black", "latino", "asian")
for(i in 1:nrow(fits)){
  first.diffs[i,] <- fits[i,]-fits[1,]
}
print(round(apply(first.diffs,1,mean),3))
print(round(apply(first.diffs,1,quant.func),3))
##

imp1 <- data.frame(scan("data/zol1.txt", what=list(numeric(0),numeric(0),numeric(0),numeric(0),numeric(0),numeric(0), numeric(0),numeric(0),numeric(0),numeric(0)), skip=2, multi.line=T))

imp2 <- data.frame(scan("data/zol2.txt", what=list(numeric(0),numeric(0),numeric(0),numeric(0),numeric(0),numeric(0), numeric(0),numeric(0),numeric(0),numeric(0)), skip=2, multi.line=T))

imp3 <- data.frame(scan("data/zol3.txt", what=list(numeric(0),numeric(0),numeric(0),numeric(0),numeric(0),numeric(0), numeric(0),numeric(0),numeric(0),numeric(0)), skip=2, multi.line=T))

imp4 <- data.frame(scan("data/zol4.txt", what=list(numeric(0),numeric(0),numeric(0),numeric(0),numeric(0),numeric(0), numeric(0),numeric(0),numeric(0),numeric(0)), skip=2, multi.line=T))

imp5 <- data.frame(scan("data/zol5.txt", what=list(numeric(0),numeric(0),numeric(0),numeric(0),numeric(0),numeric(0), numeric(0),numeric(0),numeric(0),numeric(0)), skip=2, multi.line=T))

## Define function that expands imputed datasets
process.imputed <- function(imp){

  ## Column names
  names(imp) <- c("libcon", "sex", "margin", "winner", "race",
                  "region", "party", "age", "income", "ed")  

  ## Adds some fully-observed columns
  imp$year <- diffdis$year
  imp$votwhc <- diffdis$votwhc
  imp$votblc <- diffdis$votblc
  imp$vothic <- diffdis$vothic
  imp$votasc <- diffdis$votasc
  imp$typetop5 <- diffdis$typetop5
  imp$prop <- diffdis$prop

  ##  Re-dummifies variables that were collapsed for imputation,
  imp$racewh <- imp$racebl <- imp$racehi <- imp$raceas <- 0
  imp$edno <- imp$edhs <- imp$edcol <- imp$edba <- 0
  imp$regcv <- imp$regla <- imp$regbay <- imp$regsc <- imp$regnc <- 0
  imp$prep <- imp$pdem <- imp$poth <- 0
  imp$agec1 <- imp$agec2 <- imp$agec3 <- imp$agec4 <- 0
  imp$inclow <- imp$incmed <- imp$inchi <- 0

  imp$racewh[imp$race==1] <- 1
  imp$racebl[imp$race==2] <- 1
  imp$racehi[imp$race==3] <- 1
  imp$raceas[imp$race==4] <- 1

  imp$edno[imp$ed==1] <- 1
  imp$edhs[imp$ed==2] <- 1
  imp$edcol[imp$ed==3] <- 1
  imp$edba[imp$ed==4] <- 1  

  imp$regcv[imp$region == 1] <- 1
  imp$regla[imp$region == 2] <- 1
  imp$regbay[imp$region == 3] <- 1
  imp$regsc[imp$region == 4] <- 1
  imp$regnc[imp$region == 5] <- 1

  imp$prep[imp$party == 1] <- 1
  imp$pdem[imp$party == 2] <- 1
  imp$poth[imp$party == 3] <- 1

  imp$agec1[imp$age == 1] <- 1
  imp$agec2[imp$age == 2] <- 1
  imp$agec3[imp$age == 3] <- 1
  imp$agec4[imp$age == 4] <- 1

  imp$inclow[imp$income == 1] <- 1
  imp$incmed[imp$income == 2] <- 1
  imp$inchi[imp$income == 3] <- 1

  return(imp)
}  

imp.1 <- process.imputed(imp1)
imp.2 <- process.imputed(imp2)
imp.3 <- process.imputed(imp3)
imp.4 <- process.imputed(imp4)
imp.5 <- process.imputed(imp5)

## Clean up non-expanded imps
rm(imp1, imp2, imp3, imp4, imp5)

## Table 1, Column 2
print(round(apply(rbind(imp.1, imp.2, imp.3, imp.4, imp.5)[,c(1:4,18:40)], 2, mean),3))

imps <- list(imp.1 = imp.1, imp.2 = imp.2, imp.3 = imp.3, imp.4 =
             imp.4, imp.5 = imp.5)
## clean up individual imputations
rm(imp.1, imp.2, imp.3, imp.4, imp.5)

## Define function to make combined betas from all imputations, then draw
mk.beta2 <- function(imp.list){
  D <- length(imp.list)
  beta.store <- se.store <- matrix(NA, D, 21)
  
  for(i in 1:D){

    res <- glm(winner ~ racebl + racehi + raceas + inchi + incmed +
               edhs + edcol + edba + agec1 + agec2 + agec4 + sex +
               margin + regla + regbay + regsc + regnc + libcon + pdem
               + poth, data = imp.list[[i]], family = binomial(logit)) 

    beta.store[i,] <- res$coefficients
    colnames(beta.store) <- names(res$coefficients)
    se.store[i,] <- (summary(res)$coef)[,2]
  }
  b <- apply(beta.store, 2, mean)
  var.w <- apply((se.store)^2, 2, mean)
  quadforms <- matrix(NA, D, 21)
  for(j in 1:D){
    quadforms[j,] <- (beta.store[j,]-b)^2
  }
  var.b <- (1/(D-1))*apply(quadforms,2, sum)
  se <- sqrt(var.w + ((D+1)/D)*var.b)

  return(list(b=b, se=se, beta.store=beta.store))
}

## For imputations, where prop type has only one prop.  Omit margin.
mk.beta3 <- function(imp.list){
  D <- length(imp.list)
  beta.store <- se.store <- matrix(NA, D, 20)
  
  for(i in 1:D){

    res <- glm(winner ~ racebl + racehi + raceas + inchi + incmed +
               edhs + edcol + edba + agec1 + agec2 + agec4 + sex +
               regla + regbay + regsc + regnc + libcon + pdem
               + poth, data = imp.list[[i]], family = binomial(logit)) 

    beta.store[i,] <- res$coefficients
    colnames(beta.store) <- names(res$coefficients)
    se.store[i,] <- (summary(res)$coef)[,2]
  }
  b <- apply(beta.store, 2, mean)
  var.w <- apply((se.store)^2, 2, mean)
  quadforms <- matrix(NA, D, 20)
  for(j in 1:D){
    quadforms[j,] <- (beta.store[j,]-b)^2
  }
  var.b <- (1/(D-1))*apply(quadforms,2, sum)
  se <- sqrt(var.w + ((D+1)/D)*var.b)

  return(list(b=b, se=se, beta.store=beta.store))
}

ests <- mk.beta2(imps)

## beta simulations for imputed data
nsims <- 5000
set.seed(352)
b.sims <- mvrnorm(nsims, ests$b, diag(ests$se^2))

## TABLE 5, Column 3
## Create fitted values
fits2 <- 1/(1+exp(-hyp.covars%*%t(b.sims)))
## Means
print(round(apply(fits2, 1, mean),3))
## CIs
print(round(apply(fits2, 1, quant.func),3))

## TABLE 5, Column 4
##First Differences
first.diffs2 <- matrix(NA, 4, nsims)
rownames(first.diffs2) <- c("white", "black", "latino", "asian")
for(i in 1:nrow(fits2)){
  first.diffs2[i,] <- fits2[i,]-fits2[1,]
}
## Means
print(round(apply(first.diffs2,1,mean),3))
## CIs
print(round(apply(first.diffs2,1,quant.func),3))

## Figure 3 
opar <- par()
par(mfrow=c(2,1), mar=(c(2.5,4,2,1)+.1))
plot(density(first.diffs[2,]), xlim=c(-.06,0.02), ylim=c(0,90),
     xlab="", main="Before Imputation", axes=F)
par(new=T)
plot(density(first.diffs[3,]), xlim=c(-.06,0.02), ylim=c(0,90), lty=2,
     xlab="", main="", lwd=2, axes=F) 
par(new=T)
plot(density(first.diffs[4,]), xlim=c(-.06,0.02), ylim=c(0,90), lty=3,
     xlab="", main="", axes=F)
axis(1, labels=F)
axis(2)
abline(v=0, col="grey")
text(-.01, 70, "Blacks")
text(-.032, 73, "Latinos")
text(-.005, 22,"Asians")
par(new=F)
par(mar=(c(4,4,2,1)+.1))
plot(density(first.diffs2[2,]), xlim=c(-.06,0.02), ylim=c(0,90),
     xlab="", main="After Imputation", axes=F)
par(new=T)
plot(density(first.diffs2[3,]), xlim=c(-.06,0.02), ylim=c(0,90), lty=2,
     xlab="", main="", lwd=2, axes=F) 
par(new=T)
plot(density(first.diffs2[4,]), xlim=c(-.06,0.02), ylim=c(0,90), lty=3,
     xlab="Differences in Predicted Probabilities", main="", axes=F)
axis(1)
axis(2)
abline(v=0, col="grey")
text(-.012, 70, "Blacks")
text(-.04, 70, "Latinos")
text(-.047, 13,"Asians")


## Minority Targeted props in these data: 63, 187, 209, 227
##   create datasets w/o min targ props
diffdis.nm <- diffdis[!diffdis$prop %in% c(63, 187,209,227),]
imps.nm <- list()
for(i in 1:length(imps)){
  imps.nm[[i]] <- imps[[i]][!imps[[i]]$prop %in% c(63, 187,209,227),]
  names(imps.nm)[i] <- paste("imp",i,".nm", sep="")
}

## Min Targ: diffdis data
res.nm <- glm(winner ~ racebl + racehi + raceas + inchi + incmed +
              edhs + edcol + edba + agec1 + agec2 + agec4 + sex +
              margin + regla + regbay + regsc + regnc + libcon + pdem +
              poth, data = diffdis.nm, family = binomial(logit))

set.seed(817)
beta.sim <- mvrnorm(nsims, res.nm$coefficients, summary(res.nm)$cov.unscaled) 

## TABLE 4, Column 1
## Create fitted values
fits3 <- 1/(1+exp(-hyp.covars%*%t(beta.sim)))
## Means
print(round(apply(fits3, 1, mean),3))
## CIs
print(round(apply(fits3, 1, quant.func),3))
rm(beta.sim)

## TABLE 4, Column 2
##First Differences
first.diffs3 <- matrix(NA, 4, nsims)
rownames(first.diffs3) <- c("white", "black", "latino", "asian")
for(i in 1:nrow(fits3)){
  first.diffs3[i,] <- fits3[i,]-fits3[1,]
}
## Means
print(round(apply(first.diffs3, 1, mean),3))
## CIs
print(round(apply(first.diffs3, 1, quant.func),3))

## Min Targ: imp data
ests.min <- mk.beta2(imps.nm)
nsims <- 5000
set.seed(9)
b.sims <- mvrnorm(nsims, ests.min$b, diag(ests.min$se^2))

## TABLE 4, Column 3
## Create fitted values
fits4 <- 1/(1+exp(-hyp.covars%*%t(b.sims)))
## Means
print(round(apply(fits4, 1, mean),3))
## CIs
print(round(apply(fits4, 1, quant.func),3))
##      white black latino asian
rm(b.sims)

## TABLE 4, Column 4
## First Differences
first.diffs4 <- matrix(NA, 4, nsims)
rownames(first.diffs4) <- c("white", "black", "latino", "asian")
for(i in 1:nrow(fits4)){
  first.diffs4[i,] <- fits4[i,]-fits4[1,]
}
## Means
round(apply(first.diffs4,1,mean),3)
## CIs
round(apply(first.diffs4,1,quant.func),3)
## END TABLE 4
rm(nsims)

## Figure 2
par(mfrow=c(2,1), mar=(c(2.5,4,2,1)+.1))
plot(density(first.diffs3[2,]), xlim=c(-.05,0.05), ylim=c(0,90),
     xlab="", main="Before Imputation", axes=F)
par(new=T)
plot(density(first.diffs3[3,]), xlim=c(-.05,0.05), ylim=c(0,90), lty=2,
     xlab="", main="", lwd=2, axes=F) 
par(new=T)
plot(density(first.diffs3[4,]), xlim=c(-.05,0.05), ylim=c(0,90), lty=3,
     xlab="", main="", axes=F)
axis(1, labels=F)
axis(2)
abline(v=0, col="grey")
text(-.02, 60, "Blacks")
text(.01, 72, "Latinos")
text(.013, 16,"Asians")
par(new=F)
par(mar=(c(4,4,2,1)+.1))
plot(density(first.diffs4[2,]), xlim=c(-.05,0.05), ylim=c(0,90),
     xlab="", main="After Imputation", axes=F)
par(new=T)
plot(density(first.diffs4[3,]), xlim=c(-.05,0.05), ylim=c(0,90), lty=2,
     xlab="", main="", lwd=2, axes=F) 
par(new=T)
plot(density(first.diffs4[4,]), xlim=c(-.05,0.05), ylim=c(0,90), lty=3,
     xlab="Differences in Predicted Probabilities", main="", axes=F)
axis(1)
axis(2)
abline(v=0, col="grey")
text(-.027, 70, "Blacks")
text(-.008, 88, "Latinos")
text(-.039, 25,"Asians")

## Marginal associations, listwise deletion
dd.supercalc <- function(diffdis){
  ## means
  dd.agg <- aggregate(diffdis$winner, list(wh=diffdis$racewh, bl=diffdis$racebl,
                           hi=diffdis$racehi, as=diffdis$raceas), mean)
  ## n
  dd.n <- apply(diffdis[,c("racewh", "racebl", "racehi", "raceas")], 2,
               sum, na.rm=T) 
  ## se(mean)
  dd.se <- sqrt((dd.agg$x[2:5]*(1-dd.agg$x[2:5]))/dd.n)
  ## 95% CI
  ub <- dd.agg$x[2:5] + qnorm(.025, lower.tail=F)*dd.se
  lb <- dd.agg$x[2:5] - qnorm(.025, lower.tail=F)*dd.se
  dd.ci <- rbind(lb,ub)
  
  ## Create table
  dd.margtab <- matrix(NA, 2*ncol(dd.ci), 1)
  for(i in 1:ncol(dd.ci)){
    dd.margtab[2*i-1,1] <- round(dd.agg$x[i+1], 3)
    dd.margtab[2*i,1] <- noquote(paste("(",round(t(dd.ci)[i,1], 3), ", ",
                            round(t(dd.ci)[i,2], 3), ")", sep=""))
    dd.margtab <- noquote(dd.margtab)
  }

  dd.margtab <- as.matrix(dd.margtab[c(3:8,1:2),])
  dd.margtab <- cbind(dd.margtab, NA)
  for(i in c(1,3,5)){
    dd.margtab[i,2] <-
      round(as.numeric(dd.margtab[i,1])-as.numeric(dd.margtab[7,1]),3)
  }
  dd.margtab[7:8,2] <- ""

  ## SE for pi_1 - pi_2  
  dd.sediff <- sqrt((dd.agg$x[3:5]*(1-dd.agg$x[3:5]))/dd.n[2:4] +
                   (dd.agg$x[2]*(1-dd.agg$x[2]))/dd.n[1])
  ## 95% CI for pi_1 - pi_2
  ub <- (dd.agg$x[3:5]-dd.agg$x[2]) + qnorm(.025, lower.tail=F)*dd.sediff
  lb <- (dd.agg$x[3:5]-dd.agg$x[2]) - qnorm(.025, lower.tail=F)*dd.sediff
  dd.cidiff <- rbind(lb,ub)
  for(i in 1:3){
    dd.margtab[2*i,2] <- noquote(paste("(",round(t(dd.cidiff)[i,1], 3), ", ",
                            round(t(dd.cidiff)[i,2], 3), ")", sep=""))
  }  
  return(list(dd.agg=dd.agg, dd.n=dd.n, dd.se=dd.se, dd.ci=dd.ci,
              dd.sediff=dd.sediff, dd.cidiff=dd.cidiff,
              dd.margtab=dd.margtab))
}

## TABLE 3, columns 1 and 2:
## Marginal assocs, all props
dd.margallsuper <- dd.supercalc(diffdis)
## TABLE 2, columns 1 and 2:
## Marginal assocs, excluding min targeted props
dd.margminsuper <- dd.supercalc(diffdis.nm)
print(dd.margallsuper$dd.margtab)
print(dd.margminsuper$dd.margtab)

## Marginal associations, multiple imputation
margeffimp <- function(implist){
  D <- length(implist)
  aggs <- ns <- list()
  for(i in 1:D){
    aggs[[i]] <- aggregate(implist[[i]]$winner, list(wh=implist[[i]]$racewh,
                                              bl=implist[[i]]$racebl,
                                              hi=implist[[i]]$racehi,
                                              as=implist[[i]]$raceas),
                           mean)
    ns[[i]] <- apply(implist[[i]][,c("racewh", "racebl", "racehi",
                                     "raceas")], 2, sum)
  }  
  return(list(aggs=aggs, ns=ns))
}

imp.supercalc <- function(imps){
  margimpout <- margeffimp(imps)
  aggs <- margimpout$aggs
  agg <- (aggs[[1]] + aggs[[2]] + aggs[[3]] + aggs[[4]] + aggs[[5]])/5

  ## imputed mean estimates
  imp.margtab <- matrix(NA, 2*(ncol(agg)-1), 2)
  imp.margtab[c(1,3,5,7),1] <- round(agg$x[c(2:4,1)], 3)
  imp.margtab[c(1,3,5),2] <- round(imp.margtab[c(1,3,5),1] -
                                   imp.margtab[7,1],3) 

  ## imputed variance estimates
  D <- length(margimpout$aggs)
  Wd.mat <- matrix(NA, D, 4)
  for(i in 1:D){
    Wd.mat[i,] <- (margimpout$aggs[[i]]$x *
                   (1-margimpout$aggs[[i]]$x))/margimpout$ns[[i]]
  }
  Wd <- apply(Wd.mat, 2, mean)
  Bd.mat <- matrix(NA, D, 4)
  for(i in 1:D){
    Bd.mat[i,] <- (margimpout$aggs[[i]]$x - agg$x)^2
  }
  Bd <- apply(Bd.mat, 2, sum)
  Bd <- Bd/(D-1)
  Td <- Wd + ((D+1)/D)*Bd
  imp.se <- sqrt(Td)

  ## imputed 95% CI
  ub <-  agg$x + qnorm(.025, lower.tail=F)*imp.se
  lb <-  agg$x - qnorm(.025, lower.tail=F)*imp.se
  imp.ci <- rbind(lb,ub)

  ## Store imputation estimates in imp.margtab
  for(i in 1:ncol(imp.ci)){
    imp.margtab[2*i,1] <- noquote(paste("(",round(t(imp.ci)[i,1], 3),", ",
                                        round(t(imp.ci)[i,2], 3), ")", sep=""))
  }
  ## rearrange, puT white CI at bottom, others slide up
  imp.margtab <- imp.margtab[c(1,4,3,6,5,8,7,2),]
  imp.margtab[7:8,2] <- ""

  ## Imputed first diffs, SE for pi_1 - pi_2
  imp.sediff <- sqrt((imp.se^2)[2:4]+(imp.se^2)[1])
  ## 95% CI
  ub <- (agg$x[2:4]-agg$x[1]) + qnorm(.025, lower.tail=F)*imp.sediff
  lb <- (agg$x[2:4]-agg$x[1]) - qnorm(.025, lower.tail=F)*imp.sediff
  imp.cidiff <- rbind(lb,ub)

  for(i in 1:3){
    imp.margtab[2*i,2] <- noquote(paste("(",round(t(imp.cidiff)[i,1], 3), ", ",
                            round(t(imp.cidiff)[i,2], 3), ")", sep=""))
  }
  
  return(list(agg=agg, imp.se=imp.se, imp.ci=imp.ci,
              imp.sediff=imp.sediff, imp.cidiff=imp.cidiff,
              imp.margtab=imp.margtab, D=D, Bd=Bd, Td=Td))  
}

## TABLE 3, columns 3 and 4:
## Marginal assocs, all props
imp.margallsuper <- imp.supercalc(imps)
## TABLE 2, columns 3 and 4:
## Marginal assocs, excluding min targeted props
imp.margminsuper <- imp.supercalc(imps.nm)
print(imp.margallsuper$imp.margtab)
print(imp.margminsuper$imp.margtab)

## average difference between LD and MI (Tables 1 & 2) ~ 0.6%
##  (weights from Summary Stats, Table C1)
print(weighted.mean(c(-.048,-.043,-.048),w=c(6.7, 8, 3.2)) -
  weighted.mean(c(-.041,-.039,-.037), w =c(7.8, 9.2, 3.9)))
##[1] -0.006392157
## Fraction of race cells missing, <7%:
apply(diffdis,2, nasum)
##  libcon   racewh   racebl   racehi   raceas
##   17323    13017    13017    13017    13017 
print(13017/195019)
##[1] 0.06674734

## MISSING INFO
## Fraction of missing info
tmp <- imp.margallsuper
gammaD <- ((1+1/tmp$D)*tmp$Bd)/tmp$Td
rm(tmp)
round(gammaD,3)  ##[1] 0.056 0.179 0.136 0.377

## Missing info barplot (not in article)
par(mfrow = c(1,1))
par(mar=(c(2.5,4,2,1)+.1))
barplot(gammaD, names.arg=c("Whites", "Blacks",
                                       "Latinos", "Asians"),
        xlab="", space=.2, ylab="Fraction of Missing Information",
        axes=F, main="Missing Information about the Probability of Winning", ylim=c(0,.5))
axis(2)
par(mar=(c(4,4,2,1)+.1))
rm(gammaD)

## Weights Robustness Check
## See Section 3.1
library(foreign)
zbig <- read.dta("data/bigfileznewsmall.dta")
## 92 observations are empty
zbig <- zbig[!(is.na(zbig$nobs)),]
## Count missing weights
print(nasum(zbig$obswt))
## Calculate missing weight as proportion of total nobs/race
print(round((aggregate(zbig$obswt, list(wh=zbig$racewh, bl=zbig$racebl,
                            hi=zbig$racehi, as=zbig$raceas),
           nasum)$x[2:5])/apply(zbig[,c("racewh", "racebl", "racehi",
                                        "raceas")], 2, sum, na.rm=T), 4))
##  racewh racebl racehi raceas 
##  0.0831 0.0714 0.1197 0.1069 

## Remove NA weights
zbig <- zbig[!(is.na(zbig$obswt)),]

library(survey)
dd.des <- svydesign(ids=~0, variables=~winner+racewh+racebl+racehi+raceas,
weights=~obswt, data=zbig)

## Use svyby to get group estimates
surv.wh <- svyby(~winner, ~racewh==1, design = dd.des, svymean)[,c("winner", "se")]["TRUE",]
##  0.608     0.001
surv.bl <- svyby(~winner, ~racebl==1, design = dd.des, svymean)[,c("winner", "se")]["TRUE",]
##   0.553     0.005
surv.hi <- svyby(~winner, ~racehi==1, design = dd.des, svymean)[,c("winner", "se")]["TRUE",]
##  0.562     0.005
surv.as <- svyby(~winner, ~raceas==1, design = dd.des, svymean)[,c("winner", "se")]["TRUE",]
##    0.551     0.008

survs <- list(surv.wh=surv.wh, surv.bl=surv.bl, surv.hi=surv.hi, surv.as=surv.as)
rm(dd.des, surv.wh, surv.bl, surv.hi, surv.as)
## Ests
w.est <- matrix(NA, 1, length(survs))
colnames(w.est) <- names(survs)
for(i in 1:length(survs)){
  w.est[1,i] <- as.numeric(survs[[i]][1])
}
print(round(w.est, 3))

## CIs
w.ci <- matrix(NA, 2, length(survs))
colnames(w.ci) <- c("wh", "bl", "hi", "as")
rownames(w.ci) <- c("lb", "ub")
for(i in 1:length(survs)){
  w.ci[1,i] <- as.numeric(survs[[i]][1] - qnorm(.025, lower.tail=F)*survs[[i]][2])
  w.ci[2,i] <- as.numeric(survs[[i]][1] + qnorm(.025, lower.tail=F)*survs[[i]][2])
}
print(round(w.ci,3))

## First diffs
w.diff <- matrix(NA, 1, ncol(w.ci)-1)
colnames(w.diff) <- colnames(w.ci)[2:length(colnames(w.ci))]
for(i in 2:length(survs)){
  w.diff[1,i-1] <- round(as.numeric(survs[[i]][1]-survs[[1]][1]),3)
}
print(round(w.diff,3))
## CIs
w.cidiff <- matrix(NA, 2, length(w.diff))
colnames(w.cidiff) <- c("bl", "hi", "as")
rownames(w.cidiff) <- c("lb", "ub")
w.sediff <- matrix(NA, 2, length(w.diff))
colnames(w.sediff) <- c("bl", "hi", "as")

for(i in 1:length(w.diff)){
  w.cidiff[1,i] <- w.diff[i] - (qnorm(.025, lower.tail=F)*
    sqrt(as.numeric(survs[[i+1]][2]^2+survs[[1]][2]^2)))
  w.cidiff[2,i] <- w.diff[i] + (qnorm(.025, lower.tail=F)*
    sqrt(as.numeric(survs[[i+1]][2]^2+survs[[1]][2]^2)))
}
print(round(w.cidiff,3))
rm(survs)

## Section 4 ##

## Reload zbig for types
zbig <- read.dta("data/bigfileznewsmall.dta")
## 92 observations are empty
zbig <- zbig[!(is.na(zbig$nobs)),]
## Attach types numerical (n=16) to diffdis, 5 imputed sets.
diffdis <- cbind(diffdis, type1=zbig$type1)
for(i in 1:length(imps)){
	imp.tmp <- imps[[i]]
	imp.tmp <- cbind(imp.tmp, type1=zbig$type1)
	imps[[i]] <- imp.tmp
}

## How many types?  For Table 5.
print(length(unique(zbig$type1)))  ## [1] 16
print(51/16)  ## [1] 3.1875

## Translate types numerical to single char string.
typ.names <- array(NA,length(zbig$type1))
typ.names[zbig$type1==1] <- "Education"
typ.names[zbig$type1==2] <- "Health"
typ.names[zbig$type1==4] <- "Housing"
typ.names[zbig$type1==5] <- "Transit"
typ.names[zbig$type1==6] <- "Environment"
typ.names[zbig$type1==8] <- "Tax"
typ.names[zbig$type1==10] <- "Business"
typ.names[zbig$type1==13] <- "National"
typ.names[zbig$type1==14] <- "Safety"
typ.names[zbig$type1==15] <- "Morality"
typ.names[zbig$type1==16]  <- "Gay"
typ.names[zbig$type1==17] <- "Language"
typ.names[zbig$type1==18] <- "Race"
typ.names[zbig$type1==19] <- "Fiscal"
typ.names[zbig$type1==20] <- "Elections"
typ.names[zbig$type1==21] <- "Admin" 

## TYPES on ORIGINAL (diffdis)
diffdis <- cbind(diffdis, type1n=typ.names)
## TYPES on IMPUTED (imps)
for(i in 1:length(imps)){
	imp.tmp <- imps[[i]]
	imp.tmp <- cbind(imp.tmp, type1n=typ.names)
	imps[[i]] <- imp.tmp
}
rm(i, imp.tmp, typ.names)

## Coefficient calcs for each prop type.
## Margin removed for single-prop types.
trcalc <- function(imps){
	trlist <- list()
	for(ii in unique(imps$imp.1$type1n)){
		mat.pr <- mat.fd <- matrix(NA, 4,3)
		idx <- which(imps$imp.1$type1n==ii)
		imps.tmp <- list()
		for(jj in 1:length(imps)){
			imps.tmp[[jj]] <- imps[[jj]][idx,]
		}
		if(length(unique(imps$imp.1$prop[imps$imp.1$type1n==ii]))>1){
			ests <- mk.beta2(imps.tmp)
		}else{ests <- mk.beta3(imps.tmp)}
		nsims <- 5000
		set.seed(200)
		b.sims <- mvrnorm(nsims, ests$b, diag(ests$se^2))
		if(length(unique(imps$imp.1$prop[imps$imp.1$type1n==ii]))>1){
			fits <- 1/(1+exp(-hyp.covars%*%t(b.sims)))
		}else{white <- c(1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,2,0,1)
			black <- c(1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,2,0,1)
			latino <-c(1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,2,0,1)	
			asian <- c(1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,2,0,1)
			hyp.tmp <- matrix(rbind(white, black, latino, asian), nrow=4)
			colnames(hyp.tmp) <- names(coef(t1.corr))[c(1:13,15:21)]
			rownames(hyp.tmp) <- c("white", "black", "latino", "asian")
			rm(white, black, latino, asian)
			fits <- 1/(1+exp(-hyp.tmp%*%t(b.sims)))
		}
		mat.pr[,1]<- round(apply(fits,1,mean),3)
		mat.pr[,2:3] <- round(apply(fits,1,quant.func),3)
		fds <- matrix(NA, 4,nsims)
		rownames(fds) <- c("white", "black", "latino", "asian")
		for(kk in 1:nrow(fits)){
			fds[kk,] <- fits[kk,]-fits[1,]
		}
		mat.fd[,1] <- round(apply(fds, 1, mean),3)
		mat.fd[,2:3] <- t(round(apply(fds, 1, quant.func),3))
		trlist[[ii]] <- mat.fd 
		rownames(trlist[[ii]]) <- c("wh","bl","hi","as")
		colnames(trlist[[ii]]) <- c("mean","lb","ub")
		cat(ii,"\n")
	}
	return(trlist)
}

## For original data
trcalc.dd <- function(diffdis){
	trlist <- list()
	for(ii in unique(diffdis$type1n)[!(unique(diffdis$type1n) %in% 
			c("Housing", "Race"))]){
		mat.pr <- mat.fd <- matrix(NA, 4,3)
		idx <- which(diffdis$type1n==ii)
		dd.tmp <- diffdis[idx,]

                dd.tmpsmall <- dd.tmp[,c("racebl", "racehi", "raceas",
                "inchi", "incmed", "edhs", "edcol", "edba", "agec1",
                "agec2", "agec4", "sex", "margin", "regla", "regbay",
                "regsc", "regnc", "libcon", "pdem", "poth", "winner")]

                dd.tmpsmall <- dd.tmpsmall[apply(dd.tmpsmall, 1, nasum)<1,]
		## Since "Housing" has nrow==0 here, "Housing" excluded above.
		## Since "Race" has nrow==0 here, "Race" excluded above.
		##  ("Race" is Prop 209, all region info missing.)
		if(length(unique(dd.tmpsmall$margin))>1){
                  reg.tmp <- glm(winner ~ racebl + racehi + raceas +
				inchi + incmed + edhs + edcol + edba +
				agec1 + agec2 + agec4 + sex + margin +
				regla + regbay + regsc + regnc +
				libcon+ pdem + poth, data =
				dd.tmpsmall, family = binomial(logit))
                }else{
                  reg.tmp <- glm(winner ~ racebl + racehi + raceas +
				inchi + incmed + edhs + edcol + edba +
				agec1 + agec2 + agec4 + sex + regla +
				regbay + regsc + regnc + libcon+ pdem
				+ poth, data = dd.tmpsmall, family =
				binomial(logit))
                }
		## Type = "Transit" includes lin dep regnc
		if(is.na(reg.tmp$coefficients["regnc"])){
                  reg.tmp <- glm(winner ~ racebl + racehi + raceas +
				inchi + incmed + edhs + edcol + edba +
				agec1 + agec2 + agec4 + sex + regla +
				regbay + regsc + libcon+ pdem + poth,
				data = dd.tmpsmall, family =
				binomial(logit))
                }	
		nsims <- 5000
		set.seed(200)
		beta.sim <- mvrnorm(nsims, reg.tmp$coefficients, 
				summary(reg.tmp)$cov.unscaled)
		if(length(reg.tmp$coefficients)==21){
			fits <- 1/(1+exp(-hyp.covars%*%t(beta.sim)))
		}else{
			if(length(reg.tmp$coefficients)==20){
                          white <- c(1,0,0,0,0,1,0,1,0,0,1,0,0,0,
                                     0,0,0,2,0,1)
                          black <- c(1,1,0,0,0,1,0,1,0,0,1,0,0,0,
                                     0,0,0,2,0,1)
			  latino <-c(1,0,1,0,0,1,0,1,0,0,1,0,0,0,
                                     0,0,0,2,0,1)	
			  asian <- c(1,0,0,1,0,1,0,1,0,0,1,0,0,0,
                                     0,0,0,2,0,1)
                          hyp.tmp <- matrix(rbind(white, black,
                                                  latino, asian), nrow=4)
                          colnames(hyp.tmp) <- names(coef(t1.corr))[c(1:13,
                                                                      15:21)]
                          rownames(hyp.tmp) <- c("white", "black",
                                                 "latino", "asian")
                          rm(white, black, latino, asian)
                          fits <- 1/(1+exp(-hyp.tmp%*%t(beta.sim)))
			}
			## For "Transit":
			if(length(reg.tmp$coefficients)==19){

                          white <- c(1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,2,0,1)
                          black <- c(1,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,2,0,1)
                          latino <-c(1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,2,0,1)
                          asian <- c(1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,2,0,1)
                          hyp.tmp <- matrix(rbind(white, black, latino, asian),
                                            nrow=4)
                          colnames(hyp.tmp) <- names(coef(t1.corr))[c(1:13,
                                                                      15:17,
                                                                      19:21)]
                          rownames(hyp.tmp) <- c("white", "black", "latino",
                                                 "asian")
                          rm(white, black, latino, asian)
                          fits <- 1/(1+exp(-hyp.tmp%*%t(beta.sim)))
			}
                      }
		mat.pr[,1]<- round(apply(fits,1,mean),3)
		mat.pr[,2:3] <- round(apply(fits,1,quant.func),3)
		fds <- matrix(NA, 4,nsims)
		rownames(fds) <- c("white", "black", "latino", "asian")
		for(kk in 1:nrow(fits)){
			fds[kk,] <- fits[kk,]-fits[1,]
		}
		mat.fd[,1] <- round(apply(fds, 1, mean),3)
		mat.fd[,2:3] <- t(round(apply(fds, 1, quant.func),3))
		trlist[[ii]] <- mat.fd 
		rownames(trlist[[ii]]) <- c("wh","bl","hi","as")
		colnames(trlist[[ii]]) <- c("mean","lb","ub")
		cat(ii,"\n")
              }
	return(trlist)
}


trlist <- trcalc(imps)
trlist.dd <-trcalc.dd(diffdis)
print(trlist)
print(trlist.dd)

typ.discalc <- function(tvec,gr){
  vals <- ws <- vector("numeric", length(tvec))
  idx <- 0
  for(jjj in tvec){
    idx <- idx+1
    vals[idx] <- trlist[[jjj]][gr,"mean"]
    ws[idx] <- length(unique(imps[[1]]$prop[imps[[1]]$type1n==jjj]))
  }
##  cat("For ", tvec, " props, ", gr, " w avg is \n", sep="")
  return(vals%*%ws/sum(ws))
  rm(vals,ws,idx)
}
  
typ.bl <- c("Housing", "Tax", "Admin", "Elections")
typ.hi <- c("Health", "Environment", "Transit")
typ.as <- c("Health", "Environment", "Tax")

print(typ.discalc(typ.bl, "bl")) ## -0.0565
print(typ.discalc(typ.hi, "hi")) ## -0.048667
print(typ.discalc(typ.as, "as")) ## -0.0624
rm(typ.bl, typ.hi, typ.as)

## FIGURE 1:
pdifsallld <- c(-.048, -.043, -.048)
cilsallld <- c(-.057, -.051, -.060)
ciusallld <- c(-.038, -.034, -.034)

pdifsallmi <- c(-.041, -.039, -.037)
cilsallmi <- c(-.050, -.047, -.051)
ciusallmi <- c(-.032, -.030, -.023)

pdifsnmld <- c(-.034, -.008, -.032)
cilsnmld <- c(-.043, -.016, -.045)
ciusnmld <- c(-.024, .001, -.018)

pdifsnmmi <- c(-.029, -.009, -.024)
cilsnmmi <- c(-.038, -.017, -.039)
ciusnmmi <- c(-.019, .000, -.009)

xls <- c(-.1,.05)
yls <- c(0,1)
ylevs <- (3:1)/4
smgap <- .05
txtgap <- .02
txtstart <- .005

## FIGURE 1, right panel:
plot(-1,-1, xlim=xls, ylim=yls, axes=F, xlab="", ylab="", main="Minority Voter Marginal Disadvantage, All Propositions
(Listwise Deletion and Multiple Imputation)")
points(pdifsallld, ylevs, pch=23)
segments(cilsallld, ylevs, ciusallld, ylevs)
points(pdifsallmi, ylevs-smgap, pch=19)
segments(cilsallmi, ylevs-smgap, ciusallmi, ylevs-smgap)
axis(1, at=c(-.05,0,.05))
abline(v=0, col="grey")
text(xls[1]+txtgap, ylevs[1], "LD")
text(xls[1]+txtgap, ylevs[1]-smgap, "MI")
text(xls[1]+txtgap, ylevs[2], "LD")
text(xls[1]+txtgap, ylevs[2]-smgap, "MI")
text(xls[1]+txtgap, ylevs[3], "LD")
text(xls[1]+txtgap, ylevs[3]-smgap, "MI")
text(xls[1]+txtstart, ylevs[1]-smgap/2, "Blacks")
text(xls[1]+txtstart, ylevs[2]-smgap/2, "Latinos")
text(xls[1]+txtstart, ylevs[3]-smgap/2, "Asians")


## FIGURE 1, left panel:
plot(-1,-1, xlim=xls, ylim=yls, axes=F, xlab="", ylab="", main="Minority Voter Marginal Disadvantage, Non-Targeted Propositions
(Listwise Deletion and Multiple Imputation)")
points(pdifsnmld, ylevs, pch=23)
segments(cilsnmld, ylevs, ciusnmld, ylevs)
points(pdifsnmmi, ylevs-smgap, pch=19)
segments(cilsnmmi, ylevs-smgap, ciusnmmi, ylevs-smgap)
axis(1, at=c(-.05,0,.05))
abline(v=0, col="grey")
text(xls[1]+txtgap, ylevs[1], "LD")
text(xls[1]+txtgap, ylevs[1]-smgap, "MI")
text(xls[1]+txtgap, ylevs[2], "LD")
text(xls[1]+txtgap, ylevs[2]-smgap, "MI")
text(xls[1]+txtgap, ylevs[3], "LD")
text(xls[1]+txtgap, ylevs[3]-smgap, "MI")
text(xls[1]+txtstart, ylevs[1]-smgap/2, "Blacks")
text(xls[1]+txtstart, ylevs[2]-smgap/2, "Latinos")
text(xls[1]+txtstart, ylevs[3]-smgap/2, "Asians")
